SegmentedHashSet`1.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
// NOTE: This code is derived from an implementation originally in dotnet/runtime:
// https://github.com/dotnet/runtime/blob/v8.0.3/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs
//
// See the commentary in https://github.com/dotnet/roslyn/pull/50156 for notes on incorporating changes made to the
// reference implementation.
 
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Threading;
using Microsoft.CodeAnalysis.Collections.Internal;
 
namespace Microsoft.CodeAnalysis.Collections
{
    [DebuggerTypeProxy(typeof(ICollectionDebugView<>))]
    [DebuggerDisplay("Count = {Count}")]
    internal class SegmentedHashSet<T> : ICollection<T>, ISet<T>, IReadOnlyCollection<T>
#if NET5_0_OR_GREATER
        , IReadOnlySet<T>
#endif
    {
        private const bool SupportsComparerDevirtualization
#if NET
            = true;
#else
            = false;
#endif
 
        // This uses the same array-based implementation as Dictionary<TKey, TValue>.
 
        /// <summary>Cutoff point for stackallocs. This corresponds to the number of ints.</summary>
        private const int StackAllocThreshold = 100;
 
        /// <summary>
        /// When constructing a hashset from an existing collection, it may contain duplicates,
        /// so this is used as the max acceptable excess ratio of capacity to count. Note that
        /// this is only used on the ctor and not to automatically shrink if the hashset has, e.g,
        /// a lot of adds followed by removes. Users must explicitly shrink by calling TrimExcess.
        /// This is set to 3 because capacity is acceptable as 2x rounded up to nearest prime.
        /// </summary>
        private const int ShrinkThreshold = 3;
        private const int StartOfFreeList = -3;
 
        private static IEnumerator<T>? s_emptyEnumerator;
 
        private SegmentedArray<int> _buckets;
        private SegmentedArray<Entry> _entries;
        private ulong _fastModMultiplier;
        private int _count;
        private int _freeList;
        private int _freeCount;
        private int _version;
#if NET
        private readonly IEqualityComparer<T>? _comparer;
#else
        /// <summary>
        /// <see cref="EqualityComparer{T}.Default"/> doesn't devirtualize on .NET Framework, so we always ensure
        /// <see cref="_comparer"/> is initialized to a non-<see langword="null"/> value.
        /// </summary>
        private readonly IEqualityComparer<T> _comparer;
#endif
 
        #region Constructors
 
        public SegmentedHashSet() : this((IEqualityComparer<T>?)null) { }
 
        public SegmentedHashSet(IEqualityComparer<T>? comparer)
        {
            // For reference types, we always want to store a comparer instance, either
            // the one provided, or if one wasn't provided, the default (accessing
            // EqualityComparer<TKey>.Default with shared generics on every dictionary
            // access can add measurable overhead).  For value types, if no comparer is
            // provided, or if the default is provided, we'd prefer to use
            // EqualityComparer<TKey>.Default.Equals on every use, enabling the JIT to
            // devirtualize and possibly inline the operation.
            if (!typeof(T).IsValueType)
            {
                _comparer = comparer ?? EqualityComparer<T>.Default;
            }
            else if (comparer is not null && // first check for null to avoid forcing default comparer instantiation unnecessarily
                     comparer != EqualityComparer<T>.Default)
            {
                _comparer = comparer;
            }
 
#if !NETCOREAPP
            // .NET Framework doesn't support devirtualization, so we always initialize comparer to a non-null value
            _comparer ??= EqualityComparer<T>.Default;
#endif
        }
 
        public SegmentedHashSet(int capacity) : this(capacity, null) { }
 
        public SegmentedHashSet(IEnumerable<T> collection) : this(collection, null) { }
 
        public SegmentedHashSet(IEnumerable<T> collection, IEqualityComparer<T>? comparer) : this(comparer)
        {
            if (collection == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collection);
            }
 
            if (collection is SegmentedHashSet<T> otherAsHashSet && EqualityComparersAreEqual(this, otherAsHashSet))
            {
                ConstructFrom(otherAsHashSet);
            }
            else
            {
                // To avoid excess resizes, first set size based on collection's count. The collection may
                // contain duplicates, so call TrimExcess if resulting SegmentedHashSet is larger than the threshold.
                if (collection is ICollection<T> coll)
                {
                    var count = coll.Count;
                    if (count > 0)
                    {
                        Initialize(count);
                    }
                }
 
                UnionWith(collection);
 
                if (_count > 0 && _entries.Length / _count > ShrinkThreshold)
                {
                    TrimExcess();
                }
            }
        }
 
        public SegmentedHashSet(int capacity, IEqualityComparer<T>? comparer) : this(comparer)
        {
            if (capacity < 0)
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity);
            }
 
            if (capacity > 0)
            {
                Initialize(capacity);
            }
        }
 
        /// <summary>Initializes the SegmentedHashSet from another SegmentedHashSet with the same element type and equality comparer.</summary>
        private void ConstructFrom(SegmentedHashSet<T> source)
        {
            if (source.Count == 0)
            {
                // As well as short-circuiting on the rest of the work done,
                // this avoids errors from trying to access source._buckets
                // or source._entries when they aren't initialized.
                return;
            }
 
            var capacity = source._buckets.Length;
            var threshold = HashHelpers.ExpandPrime(source.Count + 1);
 
            if (threshold >= capacity)
            {
                _buckets = (SegmentedArray<int>)source._buckets.Clone();
                _entries = (SegmentedArray<Entry>)source._entries.Clone();
                _freeList = source._freeList;
                _freeCount = source._freeCount;
                _count = source._count;
                _fastModMultiplier = source._fastModMultiplier;
            }
            else
            {
                Initialize(source.Count);
 
                var entries = source._entries;
                for (var i = 0; i < source._count; i++)
                {
                    ref var entry = ref entries[i];
                    if (entry._next >= -1)
                    {
                        AddIfNotPresent(entry._value, out _);
                    }
                }
            }
 
            Debug.Assert(Count == source.Count);
        }
 
        #endregion
 
        #region ICollection<T> methods
 
        void ICollection<T>.Add(T item) => AddIfNotPresent(item, out _);
 
        /// <summary>Removes all elements from the <see cref="SegmentedHashSet{T}"/> object.</summary>
        public void Clear()
        {
            var count = _count;
            if (count > 0)
            {
                Debug.Assert(_buckets.Length > 0, "_buckets should be non-empty");
                Debug.Assert(_entries.Length > 0, "_entries should be non-empty");
 
                SegmentedArray.Clear(_buckets);
                _count = 0;
                _freeList = -1;
                _freeCount = 0;
                SegmentedArray.Clear(_entries, 0, count);
            }
        }
 
        /// <summary>Determines whether the <see cref="SegmentedHashSet{T}"/> contains the specified element.</summary>
        /// <param name="item">The element to locate in the <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object contains the specified element; otherwise, false.</returns>
        public bool Contains(T item) => FindItemIndex(item) >= 0;
 
        /// <summary>Gets the index of the item in <see cref="_entries"/>, or -1 if it's not in the set.</summary>
        private int FindItemIndex(T item)
        {
            var buckets = _buckets;
            if (buckets.Length > 0)
            {
                var entries = _entries;
                Debug.Assert(entries.Length > 0, "Expected _entries to be initialized");
 
                uint collisionCount = 0;
                var comparer = _comparer;
 
                if (SupportsComparerDevirtualization &&
                    typeof(T).IsValueType && // comparer can only be null for value types; enable JIT to eliminate entire if block for ref types
                    comparer == null)
                {
                    // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic
                    var hashCode = item!.GetHashCode();
                    var i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based
                    while (i >= 0)
                    {
                        ref var entry = ref entries[i];
                        if (entry._hashCode == hashCode && EqualityComparer<T>.Default.Equals(entry._value, item))
                        {
                            return i;
                        }
                        i = entry._next;
 
                        collisionCount++;
                        if (collisionCount > (uint)entries.Length)
                        {
                            // The chain of entries forms a loop, which means a concurrent update has happened.
                            ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
                        }
                    }
                }
                else
                {
                    Debug.Assert(comparer is not null);
                    var hashCode = item != null ? comparer!.GetHashCode(item) : 0;
                    var i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based
                    while (i >= 0)
                    {
                        ref var entry = ref entries[i];
                        if (entry._hashCode == hashCode && comparer!.Equals(entry._value, item))
                        {
                            return i;
                        }
                        i = entry._next;
 
                        collisionCount++;
                        if (collisionCount > (uint)entries.Length)
                        {
                            // The chain of entries forms a loop, which means a concurrent update has happened.
                            ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
                        }
                    }
                }
            }
 
            return -1;
        }
 
        /// <summary>Gets a reference to the specified hashcode's bucket, containing an index into <see cref="_entries"/>.</summary>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private ref int GetBucketRef(int hashCode)
        {
            var buckets = _buckets;
            return ref buckets[(int)HashHelpers.FastMod((uint)hashCode, (uint)buckets.Length, _fastModMultiplier)];
        }
 
        public bool Remove(T item)
        {
            if (_buckets.Length > 0)
            {
                var entries = _entries;
                Debug.Assert(entries.Length > 0, "entries should be non-empty");
 
                uint collisionCount = 0;
                var last = -1;
 
                IEqualityComparer<T>? comparer = _comparer;
                Debug.Assert((SupportsComparerDevirtualization && typeof(T).IsValueType) || comparer is not null);
                int hashCode =
                    SupportsComparerDevirtualization && typeof(T).IsValueType && comparer == null ? item!.GetHashCode() :
                    item is not null ? comparer!.GetHashCode(item) :
                    0;
 
                ref var bucket = ref GetBucketRef(hashCode);
                var i = bucket - 1; // Value in buckets is 1-based
 
                while (i >= 0)
                {
                    ref var entry = ref entries[i];
 
                    if (entry._hashCode == hashCode && (comparer?.Equals(entry._value, item) ?? EqualityComparer<T>.Default.Equals(entry._value, item)))
                    {
                        if (last < 0)
                        {
                            bucket = entry._next + 1; // Value in buckets is 1-based
                        }
                        else
                        {
                            entries[last]._next = entry._next;
                        }
 
                        Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646");
                        entry._next = StartOfFreeList - _freeList;
 
#if NET
                        if (RuntimeHelpers.IsReferenceOrContainsReferences<T>())
#endif
                        {
                            entry._value = default!;
                        }
 
                        _freeList = i;
                        _freeCount++;
                        return true;
                    }
 
                    last = i;
                    i = entry._next;
 
                    collisionCount++;
                    if (collisionCount > (uint)entries.Length)
                    {
                        // The chain of entries forms a loop; which means a concurrent update has happened.
                        // Break out of the loop and throw, rather than looping forever.
                        ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
                    }
                }
            }
 
            return false;
        }
 
        /// <summary>Gets the number of elements that are contained in the set.</summary>
        public int Count => _count - _freeCount;
 
        bool ICollection<T>.IsReadOnly => false;
 
        #endregion
 
        #region IEnumerable methods
 
        public Enumerator GetEnumerator() => new(this);
 
        IEnumerator<T> IEnumerable<T>.GetEnumerator() =>
            Count == 0 ? GetEmptyEnumerator() :
            GetEnumerator();
 
        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<T>)this).GetEnumerator();
 
        private static IEnumerator<T> GetEmptyEnumerator()
        {
            return LazyInitializer.EnsureInitialized(ref s_emptyEnumerator, static () => new Enumerator(new SegmentedHashSet<T>()))!;
        }
 
        #endregion
 
        #region SegmentedHashSet methods
 
        /// <summary>Adds the specified element to the <see cref="SegmentedHashSet{T}"/>.</summary>
        /// <param name="item">The element to add to the set.</param>
        /// <returns>true if the element is added to the <see cref="SegmentedHashSet{T}"/> object; false if the element is already present.</returns>
        public bool Add(T item) => AddIfNotPresent(item, out _);
 
        /// <summary>Searches the set for a given value and returns the equal value it finds, if any.</summary>
        /// <param name="equalValue">The value to search for.</param>
        /// <param name="actualValue">The value from the set that the search found, or the default value of <typeparamref name="T"/> when the search yielded no match.</param>
        /// <returns>A value indicating whether the search was successful.</returns>
        /// <remarks>
        /// This can be useful when you want to reuse a previously stored reference instead of
        /// a newly constructed one (so that more sharing of references can occur) or to look up
        /// a value that has more complete data than the value you currently have, although their
        /// comparer functions indicate they are equal.
        /// </remarks>
        public bool TryGetValue(T equalValue, [MaybeNullWhen(false)] out T actualValue)
        {
            if (_buckets.Length > 0)
            {
                var index = FindItemIndex(equalValue);
                if (index >= 0)
                {
                    actualValue = _entries[index]._value;
                    return true;
                }
            }
 
            actualValue = default;
            return false;
        }
 
        /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain all elements that are present in itself, the specified collection, or both.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        public void UnionWith(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            foreach (var item in other)
            {
                AddIfNotPresent(item, out _);
            }
        }
 
        /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain only elements that are present in that object and in the specified collection.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        public void IntersectWith(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // Intersection of anything with empty set is empty set, so return if count is 0.
            // Same if the set intersecting with itself is the same set.
            if (Count == 0 || other == this)
            {
                return;
            }
 
            // If other is known to be empty, intersection is empty set; remove all elements, and we're done.
            if (other is ICollection<T> otherAsCollection)
            {
                if (otherAsCollection.Count == 0)
                {
                    Clear();
                    return;
                }
 
                // Faster if other is a hashset using same equality comparer; so check
                // that other is a hashset using the same equality comparer.
                if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
                {
                    IntersectWithHashSetWithSameComparer(otherAsSet);
                    return;
                }
            }
 
            IntersectWithEnumerable(other);
        }
 
        /// <summary>Removes all elements in the specified collection from the current <see cref="SegmentedHashSet{T}"/> object.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        public void ExceptWith(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // This is already the empty set; return.
            if (Count == 0)
            {
                return;
            }
 
            // Special case if other is this; a set minus itself is the empty set.
            if (other == this)
            {
                Clear();
                return;
            }
 
            // Remove every element in other from this.
            foreach (var element in other)
            {
                Remove(element);
            }
        }
 
        /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain only elements that are present either in that object or in the specified collection, but not both.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        public void SymmetricExceptWith(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // If set is empty, then symmetric difference is other.
            if (Count == 0)
            {
                UnionWith(other);
                return;
            }
 
            // Special-case this; the symmetric difference of a set with itself is the empty set.
            if (other == this)
            {
                Clear();
                return;
            }
 
            // If other is a SegmentedHashSet, it has unique elements according to its equality comparer,
            // but if they're using different equality comparers, then assumption of uniqueness
            // will fail. So first check if other is a hashset using the same equality comparer;
            // symmetric except is a lot faster and avoids bit array allocations if we can assume
            // uniqueness.
            if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
            {
                SymmetricExceptWithUniqueHashSet(otherAsSet);
            }
            else
            {
                SymmetricExceptWithEnumerable(other);
            }
        }
 
        /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a subset of the specified collection.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a subset of <paramref name="other"/>; otherwise, false.</returns>
        public bool IsSubsetOf(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // The empty set is a subset of any set, and a set is a subset of itself.
            // Set is always a subset of itself
            if (Count == 0 || other == this)
            {
                return true;
            }
 
            // Faster if other has unique elements according to this equality comparer; so check
            // that other is a hashset using the same equality comparer.
            if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
            {
                // if this has more elements then it can't be a subset
                if (Count > otherAsSet.Count)
                {
                    return false;
                }
 
                // already checked that we're using same equality comparer. simply check that
                // each element in this is contained in other.
                return IsSubsetOfHashSetWithSameComparer(otherAsSet);
            }
 
            (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false);
            return uniqueCount == Count && unfoundCount >= 0;
        }
 
        /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper subset of the specified collection.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a proper subset of <paramref name="other"/>; otherwise, false.</returns>
        public bool IsProperSubsetOf(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // No set is a proper subset of itself.
            if (other == this)
            {
                return false;
            }
 
            if (other is ICollection<T> otherAsCollection)
            {
                // No set is a proper subset of an empty set.
                if (otherAsCollection.Count == 0)
                {
                    return false;
                }
 
                // The empty set is a proper subset of anything but the empty set.
                if (Count == 0)
                {
                    return otherAsCollection.Count > 0;
                }
 
                // Faster if other is a hashset (and we're using same equality comparer).
                if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
                {
                    if (Count >= otherAsSet.Count)
                    {
                        return false;
                    }
 
                    // This has strictly less than number of items in other, so the following
                    // check suffices for proper subset.
                    return IsSubsetOfHashSetWithSameComparer(otherAsSet);
                }
            }
 
            (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false);
            return uniqueCount == Count && unfoundCount > 0;
        }
 
        /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper superset of the specified collection.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a superset of <paramref name="other"/>; otherwise, false.</returns>
        public bool IsSupersetOf(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // A set is always a superset of itself.
            if (other == this)
            {
                return true;
            }
 
            // Try to fall out early based on counts.
            if (other is ICollection<T> otherAsCollection)
            {
                // If other is the empty set then this is a superset.
                if (otherAsCollection.Count == 0)
                {
                    return true;
                }
 
                // Try to compare based on counts alone if other is a hashset with same equality comparer.
                if (other is SegmentedHashSet<T> otherAsSet &&
                    EqualityComparersAreEqual(this, otherAsSet) &&
                    otherAsSet.Count > Count)
                {
                    return false;
                }
            }
 
            foreach (T element in other)
            {
                if (!Contains(element))
                {
                    return false;
                }
            }
 
            return true;
        }
 
        /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper superset of the specified collection.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a proper superset of <paramref name="other"/>; otherwise, false.</returns>
        public bool IsProperSupersetOf(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // The empty set isn't a proper superset of any set, and a set is never a strict superset of itself.
            if (Count == 0 || other == this)
            {
                return false;
            }
 
            if (other is ICollection<T> otherAsCollection)
            {
                // If other is the empty set then this is a superset.
                if (otherAsCollection.Count == 0)
                {
                    // Note that this has at least one element, based on above check.
                    return true;
                }
 
                // Faster if other is a hashset with the same equality comparer
                if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
                {
                    if (otherAsSet.Count >= Count)
                    {
                        return false;
                    }
 
                    // Now perform element check.
                    return otherAsSet.IsSubsetOfHashSetWithSameComparer(this);
                }
            }
 
            // Couldn't fall out in the above cases; do it the long way
            (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true);
            return uniqueCount < Count && unfoundCount == 0;
        }
 
        /// <summary>Determines whether the current <see cref="SegmentedHashSet{T}"/> object and a specified collection share common elements.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object and <paramref name="other"/> share at least one common element; otherwise, false.</returns>
        public bool Overlaps(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            if (Count == 0)
            {
                return false;
            }
 
            // Set overlaps itself
            if (other == this)
            {
                return true;
            }
 
            foreach (var element in other)
            {
                if (Contains(element))
                {
                    return true;
                }
            }
 
            return false;
        }
 
        /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object and the specified collection contain the same elements.</summary>
        /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param>
        /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is equal to <paramref name="other"/>; otherwise, false.</returns>
        public bool SetEquals(IEnumerable<T> other)
        {
            if (other == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other);
            }
 
            // A set is equal to itself.
            if (other == this)
            {
                return true;
            }
 
            // Faster if other is a hashset and we're using same equality comparer.
            if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet))
            {
                // Attempt to return early: since both contain unique elements, if they have
                // different counts, then they can't be equal.
                if (Count != otherAsSet.Count)
                {
                    return false;
                }
 
                // Already confirmed that the sets have the same number of distinct elements, so if
                // one is a subset of the other then they must be equal.
                return IsSubsetOfHashSetWithSameComparer(otherAsSet);
            }
            else
            {
                // If this count is 0 but other contains at least one element, they can't be equal.
                if (Count == 0 &&
                    other is ICollection<T> otherAsCollection &&
                    otherAsCollection.Count > 0)
                {
                    return false;
                }
 
                (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true);
                return uniqueCount == Count && unfoundCount == 0;
            }
        }
 
        public void CopyTo(T[] array) => CopyTo(array, 0, Count);
 
        /// <summary>Copies the elements of a <see cref="SegmentedHashSet{T}"/> object to an array, starting at the specified array index.</summary>
        /// <param name="array">The destination array.</param>
        /// <param name="arrayIndex">The zero-based index in array at which copying begins.</param>
        public void CopyTo(T[] array, int arrayIndex) => CopyTo(array, arrayIndex, Count);
 
        public void CopyTo(T[] array, int arrayIndex, int count)
        {
            if (array == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array);
            }
 
#if NET8_0_OR_GREATER
            ArgumentOutOfRangeException.ThrowIfNegative(arrayIndex);
            ArgumentOutOfRangeException.ThrowIfNegative(count);
#else
            // Check array index valid index into array.
            if (arrayIndex < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(arrayIndex), arrayIndex, SR.ArgumentOutOfRange_NeedNonNegNum);
            }
 
            // Also throw if count less than 0.
            if (count < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(count), count, SR.ArgumentOutOfRange_NeedNonNegNum);
            }
#endif
 
            // Will the array, starting at arrayIndex, be able to hold elements? Note: not
            // checking arrayIndex >= array.Length (consistency with list of allowing
            // count of 0; subsequent check takes care of the rest)
            if (arrayIndex > array.Length || count > array.Length - arrayIndex)
            {
                ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall);
            }
 
            var entries = _entries;
            for (var i = 0; i < _count && count != 0; i++)
            {
                ref var entry = ref entries[i];
                if (entry._next >= -1)
                {
                    array[arrayIndex++] = entry._value;
                    count--;
                }
            }
        }
 
        /// <summary>Removes all elements that match the conditions defined by the specified predicate from a <see cref="SegmentedHashSet{T}"/> collection.</summary>
        public int RemoveWhere(Predicate<T> match)
        {
            if (match == null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.match);
            }
 
            var entries = _entries;
            var numRemoved = 0;
            for (var i = 0; i < _count; i++)
            {
                ref var entry = ref entries[i];
                if (entry._next >= -1)
                {
                    // Cache value in case delegate removes it
                    var value = entry._value;
                    if (match(value))
                    {
                        // Check again that remove actually removed it.
                        if (Remove(value))
                        {
                            numRemoved++;
                        }
                    }
                }
            }
 
            return numRemoved;
        }
 
        /// <summary>Gets the <see cref="IEqualityComparer"/> object that is used to determine equality for the values in the set.</summary>
        public IEqualityComparer<T> Comparer
        {
            get
            {
                return _comparer ?? EqualityComparer<T>.Default;
            }
        }
 
        /// <summary>Ensures that this hash set can hold the specified number of elements without growing.</summary>
        public int EnsureCapacity(int capacity)
        {
            if (capacity < 0)
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity);
            }
 
            var currentCapacity = _entries.Length;
            if (currentCapacity >= capacity)
            {
                return currentCapacity;
            }
 
            if (_buckets.Length == 0)
            {
                return Initialize(capacity);
            }
 
            var newSize = HashHelpers.GetPrime(capacity);
            Resize(newSize);
            return newSize;
        }
 
        private void Resize() => Resize(HashHelpers.ExpandPrime(_count));
 
        private void Resize(int newSize)
        {
            Debug.Assert(_entries.Length > 0, "_entries should be non-empty");
            Debug.Assert(newSize >= _entries.Length);
 
            var count = _count;
 
            // Rather than creating a copy of _entries, instead reuse as much of it's data as possible.
            var entries = CreateNewSegmentedArrayReusingOldSegments(_entries, newSize);
 
            // Assign member variables after both arrays allocated to guard against corruption from OOM if second fails
            _buckets = new SegmentedArray<int>(newSize);
            _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)newSize);
            for (var i = 0; i < count; i++)
            {
                ref var entry = ref entries[i];
                if (entry._next >= -1)
                {
                    ref var bucket = ref GetBucketRef(entry._hashCode);
                    entry._next = bucket - 1; // Value in _buckets is 1-based
                    bucket = i + 1;
                }
            }
 
            _entries = entries;
        }
 
        private static SegmentedArray<Entry> CreateNewSegmentedArrayReusingOldSegments(SegmentedArray<Entry> oldArray, int newSize)
        {
            var segments = SegmentedCollectionsMarshal.AsSegments(oldArray);
 
            var oldSegmentCount = segments.Length;
            var newSegmentCount = (newSize + SegmentedArrayHelper.GetSegmentSize<Entry>() - 1) >> SegmentedArrayHelper.GetSegmentShift<Entry>();
 
            // Grow the array of segments, if necessary
            Array.Resize(ref segments, newSegmentCount);
 
            // Resize all segments to full segment size from the last old segment to the next to last
            // new segment.
            for (var i = oldSegmentCount - 1; i < newSegmentCount - 1; i++)
                Array.Resize(ref segments[i], SegmentedArrayHelper.GetSegmentSize<Entry>());
 
            // Resize the last segment
            var lastSegmentSize = newSize - ((newSegmentCount - 1) << SegmentedArrayHelper.GetSegmentShift<Entry>());
            Array.Resize(ref segments[newSegmentCount - 1], lastSegmentSize);
 
            return SegmentedCollectionsMarshal.AsSegmentedArray(newSize, segments);
        }
 
        /// <summary>
        /// Sets the capacity of a <see cref="SegmentedHashSet{T}"/> object to the actual number of elements it contains,
        /// rounded up to a nearby, implementation-specific value.
        /// </summary>
        public void TrimExcess()
        {
            var capacity = Count;
 
            var newSize = HashHelpers.GetPrime(capacity);
            var oldEntries = _entries;
            var currentCapacity = oldEntries.Length;
            if (newSize >= currentCapacity)
            {
                return;
            }
 
            var oldCount = _count;
            _version++;
            Initialize(newSize);
            var entries = _entries;
            var count = 0;
            for (var i = 0; i < oldCount; i++)
            {
                var hashCode = oldEntries[i]._hashCode; // At this point, we know we have entries.
                if (oldEntries[i]._next >= -1)
                {
                    ref var entry = ref entries[count];
                    entry = oldEntries[i];
                    ref var bucket = ref GetBucketRef(hashCode);
                    entry._next = bucket - 1; // Value in _buckets is 1-based
                    bucket = count + 1;
                    count++;
                }
            }
 
            _count = capacity;
            _freeCount = 0;
        }
 
        #endregion
 
        #region Helper methods
 
        /// <summary>Returns an <see cref="IEqualityComparer"/> object that can be used for equality testing of a <see cref="SegmentedHashSet{T}"/> object.</summary>
        public static IEqualityComparer<SegmentedHashSet<T>> CreateSetComparer() => new SegmentedHashSetEqualityComparer<T>();
 
        /// <summary>
        /// Initializes buckets and slots arrays. Uses suggested capacity by finding next prime
        /// greater than or equal to capacity.
        /// </summary>
        private int Initialize(int capacity)
        {
            var size = HashHelpers.GetPrime(capacity);
            var buckets = new SegmentedArray<int>(size);
            var entries = new SegmentedArray<Entry>(size);
 
            // Assign member variables after both arrays are allocated to guard against corruption from OOM if second fails.
            _freeList = -1;
            _buckets = buckets;
            _entries = entries;
            _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)size);
 
            return size;
        }
 
        /// <summary>Adds the specified element to the set if it's not already contained.</summary>
        /// <param name="value">The element to add to the set.</param>
        /// <param name="location">The index into <see cref="_entries"/> of the element.</param>
        /// <returns>true if the element is added to the <see cref="SegmentedHashSet{T}"/> object; false if the element is already present.</returns>
        private bool AddIfNotPresent(T value, out int location)
        {
            if (_buckets.Length == 0)
            {
                Initialize(0);
            }
            Debug.Assert(_buckets.Length > 0);
 
            var entries = _entries;
            Debug.Assert(entries.Length > 0, "expected entries to be non-empty");
 
            var comparer = _comparer;
            int hashCode;
 
            uint collisionCount = 0;
            ref var bucket = ref RoslynUnsafe.NullRef<int>();
 
            if (SupportsComparerDevirtualization &&
                typeof(T).IsValueType && // comparer can only be null for value types; enable JIT to eliminate entire if block for ref types
                comparer == null)
            {
                hashCode = value!.GetHashCode();
                bucket = ref GetBucketRef(hashCode);
                var i = bucket - 1; // Value in _buckets is 1-based
 
                // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic
                while (i >= 0)
                {
                    ref var entry = ref entries[i];
                    if (entry._hashCode == hashCode && EqualityComparer<T>.Default.Equals(entry._value, value))
                    {
                        location = i;
                        return false;
                    }
                    i = entry._next;
 
                    collisionCount++;
                    if (collisionCount > (uint)entries.Length)
                    {
                        // The chain of entries forms a loop, which means a concurrent update has happened.
                        ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
                    }
                }
            }
            else
            {
                Debug.Assert(comparer is not null);
                hashCode = value != null ? comparer!.GetHashCode(value) : 0;
                bucket = ref GetBucketRef(hashCode);
                var i = bucket - 1; // Value in _buckets is 1-based
                while (i >= 0)
                {
                    ref var entry = ref entries[i];
                    if (entry._hashCode == hashCode && comparer!.Equals(entry._value, value))
                    {
                        location = i;
                        return false;
                    }
                    i = entry._next;
 
                    collisionCount++;
                    if (collisionCount > (uint)entries.Length)
                    {
                        // The chain of entries forms a loop, which means a concurrent update has happened.
                        ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported();
                    }
                }
            }
 
            int index;
            if (_freeCount > 0)
            {
                index = _freeList;
                _freeCount--;
                Debug.Assert((StartOfFreeList - entries[_freeList]._next) >= -1, "shouldn't overflow because `next` cannot underflow");
                _freeList = StartOfFreeList - entries[_freeList]._next;
            }
            else
            {
                var count = _count;
                if (count == entries.Length)
                {
                    Resize();
                    bucket = ref GetBucketRef(hashCode);
                }
                index = count;
                _count = count + 1;
                entries = _entries;
            }
 
            {
                ref var entry = ref entries[index];
                entry._hashCode = hashCode;
                entry._next = bucket - 1; // Value in _buckets is 1-based
                entry._value = value;
                bucket = index + 1;
                _version++;
                location = index;
            }
 
            return true;
        }
 
        /// <summary>
        /// Implementation Notes:
        /// If other is a hashset and is using same equality comparer, then checking subset is
        /// faster. Simply check that each element in this is in other.
        ///
        /// Note: if other doesn't use same equality comparer, then Contains check is invalid,
        /// which is why callers must take are of this.
        ///
        /// If callers are concerned about whether this is a proper subset, they take care of that.
        /// </summary>
        internal bool IsSubsetOfHashSetWithSameComparer(SegmentedHashSet<T> other)
        {
            foreach (var item in this)
            {
                if (!other.Contains(item))
                {
                    return false;
                }
            }
 
            return true;
        }
 
        /// <summary>
        /// If other is a hashset that uses same equality comparer, intersect is much faster
        /// because we can use other's Contains
        /// </summary>
        private void IntersectWithHashSetWithSameComparer(SegmentedHashSet<T> other)
        {
            var entries = _entries;
            for (var i = 0; i < _count; i++)
            {
                ref var entry = ref entries[i];
                if (entry._next >= -1)
                {
                    var item = entry._value;
                    if (!other.Contains(item))
                    {
                        Remove(item);
                    }
                }
            }
        }
 
        /// <summary>
        /// Iterate over other. If contained in this, mark an element in bit array corresponding to
        /// its position in _slots. If anything is unmarked (in bit array), remove it.
        ///
        /// This attempts to allocate on the stack, if below StackAllocThreshold.
        /// </summary>
        private unsafe void IntersectWithEnumerable(IEnumerable<T> other)
        {
            Debug.Assert(_buckets.Length > 0, "_buckets shouldn't be empty; callers should check first");
 
            // Keep track of current last index; don't want to move past the end of our bit array
            // (could happen if another thread is modifying the collection).
            var originalCount = _count;
            int intArrayLength = BitHelper.ToIntArrayLength(originalCount);
 
            Span<int> span = stackalloc int[StackAllocThreshold];
            var bitHelper = intArrayLength <= StackAllocThreshold
                ? new BitHelper(span.Slice(0, intArrayLength), clear: true)
                : new BitHelper(new int[intArrayLength], clear: false);
 
            // Mark if contains: find index of in slots array and mark corresponding element in bit array.
            foreach (var item in other)
            {
                var index = FindItemIndex(item);
                if (index >= 0)
                {
                    bitHelper.MarkBit(index);
                }
            }
 
            // If anything unmarked, remove it. Perf can be optimized here if BitHelper had a
            // FindFirstUnmarked method.
            for (var i = 0; i < originalCount; i++)
            {
                ref var entry = ref _entries[i];
                if (entry._next >= -1 && !bitHelper.IsMarked(i))
                {
                    Remove(entry._value);
                }
            }
        }
 
        /// <summary>
        /// if other is a set, we can assume it doesn't have duplicate elements, so use this
        /// technique: if can't remove, then it wasn't present in this set, so add.
        ///
        /// As with other methods, callers take care of ensuring that other is a hashset using the
        /// same equality comparer.
        /// </summary>
        /// <param name="other"></param>
        private void SymmetricExceptWithUniqueHashSet(SegmentedHashSet<T> other)
        {
            foreach (var item in other)
            {
                if (!Remove(item))
                {
                    AddIfNotPresent(item, out _);
                }
            }
        }
 
        /// <summary>
        /// Implementation notes:
        ///
        /// Used for symmetric except when other isn't a SegmentedHashSet. This is more tedious because
        /// other may contain duplicates. SegmentedHashSet technique could fail in these situations:
        /// 1. Other has a duplicate that's not in this: SegmentedHashSet technique would add then
        /// remove it.
        /// 2. Other has a duplicate that's in this: SegmentedHashSet technique would remove then add it
        /// back.
        /// In general, its presence would be toggled each time it appears in other.
        ///
        /// This technique uses bit marking to indicate whether to add/remove the item. If already
        /// present in collection, it will get marked for deletion. If added from other, it will
        /// get marked as something not to remove.
        ///
        /// </summary>
        /// <param name="other"></param>
        private unsafe void SymmetricExceptWithEnumerable(IEnumerable<T> other)
        {
            var originalCount = _count;
            int intArrayLength = BitHelper.ToIntArrayLength(originalCount);
 
            Span<int> itemsToRemoveSpan = stackalloc int[StackAllocThreshold / 2];
            var itemsToRemove = intArrayLength <= StackAllocThreshold / 2
                ? new BitHelper(itemsToRemoveSpan.Slice(0, intArrayLength), clear: true)
                : new BitHelper(new int[intArrayLength], clear: false);
 
            Span<int> itemsAddedFromOtherSpan = stackalloc int[StackAllocThreshold / 2];
            var itemsAddedFromOther = intArrayLength <= StackAllocThreshold / 2
                ? new BitHelper(itemsAddedFromOtherSpan.Slice(0, intArrayLength), clear: true)
                : new BitHelper(new int[intArrayLength], clear: false);
 
            foreach (var item in other)
            {
                if (AddIfNotPresent(item, out var location))
                {
                    // wasn't already present in collection; flag it as something not to remove
                    // *NOTE* if location is out of range, we should ignore. BitHelper will
                    // detect that it's out of bounds and not try to mark it. But it's
                    // expected that location could be out of bounds because adding the item
                    // will increase _lastIndex as soon as all the free spots are filled.
                    itemsAddedFromOther.MarkBit(location);
                }
                else
                {
                    // already there...if not added from other, mark for remove.
                    // *NOTE* Even though BitHelper will check that location is in range, we want
                    // to check here. There's no point in checking items beyond originalCount
                    // because they could not have been in the original collection
                    if (location < originalCount && !itemsAddedFromOther.IsMarked(location))
                    {
                        itemsToRemove.MarkBit(location);
                    }
                }
            }
 
            // if anything marked, remove it
            for (var i = 0; i < originalCount; i++)
            {
                if (itemsToRemove.IsMarked(i))
                {
                    Remove(_entries[i]._value);
                }
            }
        }
 
        /// <summary>
        /// Determines counts that can be used to determine equality, subset, and superset. This
        /// is only used when other is an IEnumerable and not a SegmentedHashSet. If other is a SegmentedHashSet
        /// these properties can be checked faster without use of marking because we can assume
        /// other has no duplicates.
        ///
        /// The following count checks are performed by callers:
        /// 1. Equals: checks if unfoundCount = 0 and uniqueFoundCount = _count; i.e. everything
        /// in other is in this and everything in this is in other
        /// 2. Subset: checks if unfoundCount >= 0 and uniqueFoundCount = _count; i.e. other may
        /// have elements not in this and everything in this is in other
        /// 3. Proper subset: checks if unfoundCount > 0 and uniqueFoundCount = _count; i.e
        /// other must have at least one element not in this and everything in this is in other
        /// 4. Proper superset: checks if unfound count = 0 and uniqueFoundCount strictly less
        /// than _count; i.e. everything in other was in this and this had at least one element
        /// not contained in other.
        ///
        /// An earlier implementation used delegates to perform these checks rather than returning
        /// an ElementCount struct; however this was changed due to the perf overhead of delegates.
        /// </summary>
        /// <param name="other"></param>
        /// <param name="returnIfUnfound">Allows us to finish faster for equals and proper superset
        /// because unfoundCount must be 0.</param>
        private (int UniqueCount, int UnfoundCount) CheckUniqueAndUnfoundElements(IEnumerable<T> other, bool returnIfUnfound)
        {
            // Need special case in case this has no elements.
            if (_count == 0)
            {
                var numElementsInOther = 0;
                foreach (var item in other)
                {
                    numElementsInOther++;
                    break; // break right away, all we want to know is whether other has 0 or 1 elements
                }
 
                return (UniqueCount: 0, UnfoundCount: numElementsInOther);
            }
 
            Debug.Assert((_buckets.Length > 0) && (_count > 0), "_buckets was empty but count greater than 0");
 
            var originalCount = _count;
            int intArrayLength = BitHelper.ToIntArrayLength(originalCount);
 
            Span<int> span = stackalloc int[StackAllocThreshold];
            var bitHelper = intArrayLength <= StackAllocThreshold
                ? new BitHelper(span.Slice(0, intArrayLength), clear: true)
                : new BitHelper(new int[intArrayLength], clear: false);
 
            var unfoundCount = 0; // count of items in other not found in this
            var uniqueFoundCount = 0; // count of unique items in other found in this
 
            foreach (var item in other)
            {
                var index = FindItemIndex(item);
                if (index >= 0)
                {
                    if (!bitHelper.IsMarked(index))
                    {
                        // Item hasn't been seen yet.
                        bitHelper.MarkBit(index);
                        uniqueFoundCount++;
                    }
                }
                else
                {
                    unfoundCount++;
                    if (returnIfUnfound)
                    {
                        break;
                    }
                }
            }
 
            return (uniqueFoundCount, unfoundCount);
        }
 
        /// <summary>
        /// Checks if equality comparers are equal. This is used for algorithms that can
        /// speed up if it knows the other item has unique elements. I.e. if they're using
        /// different equality comparers, then uniqueness assumption between sets break.
        /// </summary>
        internal static bool EqualityComparersAreEqual(SegmentedHashSet<T> set1, SegmentedHashSet<T> set2) => set1.Comparer.Equals(set2.Comparer);
 
        #endregion
 
        private struct Entry
        {
            public int _hashCode;
            /// <summary>
            /// 0-based index of next entry in chain: -1 means end of chain
            /// also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3,
            /// so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc.
            /// </summary>
            public int _next;
            public T _value;
        }
 
        public struct Enumerator : IEnumerator<T>
        {
            private readonly SegmentedHashSet<T> _hashSet;
            private readonly int _version;
            private int _index;
            private T _current;
 
            internal Enumerator(SegmentedHashSet<T> hashSet)
            {
                _hashSet = hashSet;
                _version = hashSet._version;
                _index = 0;
                _current = default!;
            }
 
            public bool MoveNext()
            {
                if (_version != _hashSet._version)
                {
                    ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
                }
 
                // Use unsigned comparison since we set index to dictionary.count+1 when the enumeration ends.
                // dictionary.count+1 could be negative if dictionary.count is int.MaxValue
                while ((uint)_index < (uint)_hashSet._count)
                {
                    ref var entry = ref _hashSet._entries[_index++];
                    if (entry._next >= -1)
                    {
                        _current = entry._value;
                        return true;
                    }
                }
 
                _index = _hashSet._count + 1;
                _current = default!;
                return false;
            }
 
            public readonly T Current => _current;
 
            public readonly void Dispose() { }
 
            readonly object? IEnumerator.Current
            {
                get
                {
                    if (_index == 0 || (_index == _hashSet._count + 1))
                    {
                        ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumOpCantHappen();
                    }
 
                    return _current;
                }
            }
 
            void IEnumerator.Reset()
            {
                if (_version != _hashSet._version)
                {
                    ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
                }
 
                _index = 0;
                _current = default!;
            }
        }
    }
}
File: src\Dependencies\Collections\SegmentedHashSet`1.cs	Web Access
Project: src\src\Compilers\Core\Portable\Microsoft.CodeAnalysis.csproj (Microsoft.CodeAnalysis)